clear			all
set 			more off
cd 				"~/Dropbox/Indonesia Migration/" // Specify your root directory here
grstyle			init
grstyle			set plain, grid 

/*******************************************************************************
						INPRES QUESTION
********************************************************************************/
use 			"Data/inpresdata.dta" 

binscatter		yeduc p504thn, by(recp) absorb(birthpl) ///
				controls(ch71 en71 wsppc dens71) line(connect) ///
				ytitle("Year of education") xtitle("Year of birth") ///
				xla(50 "1950" 55 "1955" 60 "1960" 65 "1965" 70 "1970") ///
				xline(65) xline(62 63 64 65 66, ///
				lwidth(8) lcolor(bluishgray)) ///
				legend(cols(2) order(1 "Low INPRES Intensity" 2 "High Inpres Intensity"))
graph 			export "Figures/INPRES_binscatter.pdf", as(pdf) replace			

foreach 		var in yeduc p504thn{
quietly 		regress `var' i.birthpl ch71 en71 wsppc dens71
predict 		`var'_res, res
}

tw 				(lpolyci yeduc_res p504thn if recp==1, level(90)) ///
				(lpolyci yeduc_res p504thn if recp==0, level(90)), ///
				ytitle("Year of education (residual)") xtitle("Year of birth") ///
				xla(50 "1950" 55 "1955" 60 "1960" 65 "1965" 70 "1970") ///
				xline(65) xline(62 63 64 65 66, ///
				lwidth(8) lcolor(bluishgray)) ///
				legend(cols(2) order(3 "Low INPRES Intensity" 2 "High Inpres Intensity"))
graph 			export "Figures/INPRES_lpoly.pdf", as(pdf) replace			

/*******************************************************************************
********************************************************************************/

cd 				"~/Dropbox/Indonesia Migration/" // Specify your root directory here
/*******************************************************************************
Generating the percentage variables used to construct the IV
********************************************************************************/
foreach			t in 1976 1980 1985 1990 1995 2000 2005 2010 {
use				"Data/Census_`t'.dta", clear
keep			year geolev1 geolev2 origin bpl migrate5 migtype age sex edattain perwt
label 			var geolev1 "Residence province at the time of the survey"
label 			var geolev2 "Residence district at the time of the survey"
label 			var origin "Residence province five years ago"
label 			var bpl "Province of birth"
bys				geolev2 year: egen pop_cst = total(perwt) // Resident population of each district
foreach			i of numlist 360011 360012 360013 360014 360015 360016 360017 ///
				360018 360031 360032 360033 360034 360035 360051 360052 360053 ///
				360054 360061 360062 360063 360064 360071 360072 360073 360074 ///
				360081 360094  {
gen				migrant_`i'_5Y = 1 if (origin == `i' & migtype == 1) //Origin province-specific migrant
recode			migrant_`i'_5Y (.=0)
bys				geolev2 year: egen pop_`i'ct_5Y = total(migrant_`i'_5Y*perwt) // Total migrants from an origin province in the residence district
bys				year: egen pop_`i't_5Y = total(migrant_`i'_5Y*perwt) // Total migrants from an origin provinces across all districts
generate		per_`i'c_5Y = pop_`i'ct_5Y/pop_`i't_5Y //% migrants from an origin province in different districts
}
generate		per_ict_5Y = .
foreach			i in 360011 360012 360013 360014 360015 360016 360017 ///
				360018 360031 360032 360033 360034 360035 360051 360052 360053 360054 ///
				360061 360062 360063 360064 360071 360072 360073 360074 360081 ///
				360094 {
replace			per_ict_5Y = per_`i'c_5Y if origin == `i'	//For each origin, what is the % of migrants from that origin in the current city		
}
egen 			total_immig_cst_5Y = rowtotal(pop_360011ct_5Y pop_360012ct_5Y /// Total predicted migrant in the city is
				pop_360013ct_5Y pop_360014ct_5Y pop_360015ct_5Y pop_360016ct_5Y /// the sum of migrants from all oirgin provinces
				pop_360017ct_5Y pop_360018ct_5Y pop_360031ct_5Y pop_360032ct_5Y ///
				pop_360033ct_5Y pop_360034ct_5Y pop_360035ct_5Y pop_360051ct_5Y ///
				pop_360052ct_5Y pop_360053ct_5Y pop_360054ct_5Y pop_360061ct_5Y ///
				pop_360062ct_5Y pop_360063ct_5Y pop_360064ct_5Y pop_360071ct_5Y ///
				pop_360072ct_5Y pop_360073ct_5Y pop_360074ct_5Y pop_360081ct_5Y ///
				pop_360094ct_5Y)		
bys				geolev2 year: gen per_immig_cst_5Y = total_immig_cst_5Y/pop_cst // As a percentage of the total population
gen				mvmt = 1 if inlist(migrate5,20,30) // across province move
recode			mvmt(.=0)
keep 			year geolev2 origin migrate5 per_ict_5Y per_immig_cst_5Y ///
				pop_cst migtype age sex edattain geolev1 mvmt perwt
label 			var migtype "New/recent across-province migrant (old migrants, natives = 0)"
label 			var per_ict_5Y "% migrants in the residence district from origin province"
label 			var per_immig_cst_5Y "Total new migrants in the residence district"
label 			var pop_cst "Residence district population"
label 			var mvmt "New/recent across-province migrant (old migrants, natives = 0)"
tempfile		`t'_all_5Y_weight
save			``t'_all_5Y_weight' // Respondent-level survey year files					
}
/*******************************************************************************
End of generating the percentage variables used to construct the IV
********************************************************************************/
/*******************************************************************************
Matching each migrant with their percentage (origin-destination) group. 
********************************************************************************/
use				`1976_all_5Y_weight'
foreach			t in 1980 1985 1990 1995 2000 2005 2010{
append			using ``t'_all_5Y_weight' // Respondent-level survey year files
}

save 			"Data_EDCC_Revision/Outmigration.dta", replace

bys 			year origin: egen population_it = total(perwt) // Population of each province, including those who later migrated
drop			if mvmt == 0 //calculating outflow between t and t-1 from each province
bys 			year origin: egen outflow_it = total(mvmt*perwt) // Outflow from origin province
gen 			outmigration_percent = outflow_it*100/population_it
duplicates		drop origin year, force
keep			origin year outmigration_percent // percentage ooutflows from each province bw two survey waves
drop 			if outmigration_percent == 100 // Why are there origins with 100% migrant pop

sort 			year origin
egen 			year_origin = group(year origin)
tw				(scatter outmigration_percent year_origin if year == 1976)  ///
				(scatter outmigration_percent year_origin if year == 1980, msymbol(D))  ///
				(scatter outmigration_percent year_origin if year == 1985, msymbol(T))  ///
				(scatter outmigration_percent year_origin if year == 1990, msymbol(S))  ///
				(scatter outmigration_percent year_origin if year == 1995, msymbol(+))  ///
				(scatter outmigration_percent year_origin if year == 2000, msymbol(X))  ///
				(scatter outmigration_percent year_origin if year == 2005, msymbol(oh))  ///
				(scatter outmigration_percent year_origin if year == 2010, msymbol(dh)), /// 
				xline(26.5 52.5 79.5 106.5 123.5 149.5 174.5) xtitle("") ///
				ytitle("Outmigration (% of origin province's population)") ///
				legend(order(1 "Year 1976" 2 "Year 1980" 3 "Year 1985" ///
				4 "Year 1990" 5 "Year 1995" 6 "Year 2000" 7 "Year 2005" ///
				8 "Year 2010") row(2)) xla("")

graph 			export "Figures/Outmigration_dispersion.pdf", as(pdf) replace			
/*******************************************************************************
Demonstrating shocks (shifts) are independent
********************************************************************************/
use 			"Data_EDCC_Revision/Outmigration.dta", clear
bys 			year origin: egen population_it = total(perwt) // Population of each province, including those who later migrated
drop			if mvmt == 0 //calculating outflow between t and t-1 from each province
bys 			year origin: egen outflow_it = total(mvmt*perwt) // Outflow from origin province
gen 			outmigration_percent = outflow_it*100/population_it
duplicates		drop origin year, force
keep			origin year outmigration_percent // percentage ooutflows from each province bw two survey waves
drop 			if outmigration_percent == 100 // Why are there origins with 100% migrant pop


gen 			id = 1
tempfile		all
save			`all'
reshape 		wide outmigration_percent, i(id year) j(origin)

merge 1:m		id year using `all'
drop 			_merge
rename			outmigration_percent outmigration_percent_origin
reshape			long outmigration_percent, i(id year origin) j(other_origins)
replace			outmigration_percent_origin = . if origin == other_origins


reg				outmigration_percent outmigration_percent_origin
local 			eq `"Outmigration{subscript:-o,t} ="'						// find the dependt variable
local 			eq "`eq' `: di  %4.2f _b[_cons]'***"								// choose a nice display format for the constant
local 			eq `"`eq' `=cond(_b[outmigration_percent_origin]>0, "+", "-")'"'	// should we add or subtract
// We already chose the plus or minus sign so we need to strip a minus sign when it is there
local 			eq `"`eq' `:di %4.2f abs(_b[outmigration_percent_origin])' Outmigration{subscript:o,t}"'
local 			eq `"`eq' + {&epsilon}{subscript:o,t}"'							// add the error term
local 			eq "`eq'; R{superscript:2} = `: di  %4.2f e(r2)'"				// add other stats


tw				(scatter outmigration_percent outmigration_percent_origin) ///
				(lfitci outmigration_percent outmigration_percent_origin), ///
				note("`eq'") xtitle("Outmigration{subscript:o,t}") ///
				ytitle("Outmigration{subscript:-o,t}") ///
				legend(order(1 "Correlation b/w outmigration from different origins in a year"))
graph 			export "Figures/Outmigrationshock_independence.pdf", as(pdf) replace			
